library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
Life Expectancy dataset from Our World in Data.
le_df <- read_csv("data/life-expectancy.csv")
## Rows: 20755 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Period life expectancy at birth - Sex: all - Age: 0
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(le_df)
## Rows: 20,755
## Columns: 4
## $ Entity <chr> "Afghanistan", "…
## $ Code <chr> "AFG", "AFG", "A…
## $ Year <dbl> 1950, 1951, 1952…
## $ `Period life expectancy at birth - Sex: all - Age: 0` <dbl> 27.7275, 27.9634…
le_clean <- janitor::clean_names(le_df)
# rename column
le_clean <- le_clean %>%
rename(period_life_expect = period_life_expectancy_at_birth_sex_all_age_0)
glimpse(le_clean)
## Rows: 20,755
## Columns: 4
## $ entity <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afgha…
## $ code <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "A…
## $ year <dbl> 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 195…
## $ period_life_expect <dbl> 27.7275, 27.9634, 28.4456, 28.9304, 29.2258, 29.920…
It include countries, continents, world, groups of different development status and income levels.
le_clean %>% pull(entity) %>% unique()
## [1] "Afghanistan"
## [2] "Africa"
## [3] "Albania"
## [4] "Algeria"
## [5] "American Samoa"
## [6] "Americas"
## [7] "Andorra"
## [8] "Angola"
## [9] "Anguilla"
## [10] "Antigua and Barbuda"
## [11] "Argentina"
## [12] "Armenia"
## [13] "Aruba"
## [14] "Asia"
## [15] "Australia"
## [16] "Austria"
## [17] "Azerbaijan"
## [18] "Bahamas"
## [19] "Bahrain"
## [20] "Bangladesh"
## [21] "Barbados"
## [22] "Belarus"
## [23] "Belgium"
## [24] "Belize"
## [25] "Benin"
## [26] "Bermuda"
## [27] "Bhutan"
## [28] "Bolivia"
## [29] "Bonaire Sint Eustatius and Saba"
## [30] "Bosnia and Herzegovina"
## [31] "Botswana"
## [32] "Brazil"
## [33] "British Virgin Islands"
## [34] "Brunei"
## [35] "Bulgaria"
## [36] "Burkina Faso"
## [37] "Burundi"
## [38] "Cambodia"
## [39] "Cameroon"
## [40] "Canada"
## [41] "Cape Verde"
## [42] "Cayman Islands"
## [43] "Central African Republic"
## [44] "Chad"
## [45] "Chile"
## [46] "China"
## [47] "Colombia"
## [48] "Comoros"
## [49] "Congo"
## [50] "Cook Islands"
## [51] "Costa Rica"
## [52] "Cote d'Ivoire"
## [53] "Croatia"
## [54] "Cuba"
## [55] "Curacao"
## [56] "Cyprus"
## [57] "Czechia"
## [58] "Democratic Republic of Congo"
## [59] "Denmark"
## [60] "Djibouti"
## [61] "Dominica"
## [62] "Dominican Republic"
## [63] "East Timor"
## [64] "Ecuador"
## [65] "Egypt"
## [66] "El Salvador"
## [67] "England and Wales"
## [68] "Equatorial Guinea"
## [69] "Eritrea"
## [70] "Estonia"
## [71] "Eswatini"
## [72] "Ethiopia"
## [73] "Europe"
## [74] "Falkland Islands"
## [75] "Faroe Islands"
## [76] "Fiji"
## [77] "Finland"
## [78] "France"
## [79] "French Guiana"
## [80] "French Polynesia"
## [81] "Gabon"
## [82] "Gambia"
## [83] "Georgia"
## [84] "Germany"
## [85] "Ghana"
## [86] "Gibraltar"
## [87] "Greece"
## [88] "Greenland"
## [89] "Grenada"
## [90] "Guadeloupe"
## [91] "Guam"
## [92] "Guatemala"
## [93] "Guernsey"
## [94] "Guinea"
## [95] "Guinea-Bissau"
## [96] "Guyana"
## [97] "Haiti"
## [98] "High-income countries"
## [99] "Honduras"
## [100] "Hong Kong"
## [101] "Hungary"
## [102] "Iceland"
## [103] "India"
## [104] "Indonesia"
## [105] "Iran"
## [106] "Iraq"
## [107] "Ireland"
## [108] "Isle of Man"
## [109] "Israel"
## [110] "Italy"
## [111] "Jamaica"
## [112] "Japan"
## [113] "Jersey"
## [114] "Jordan"
## [115] "Kazakhstan"
## [116] "Kenya"
## [117] "Kiribati"
## [118] "Kosovo"
## [119] "Kuwait"
## [120] "Kyrgyzstan"
## [121] "Land-locked Developing Countries (LLDC)"
## [122] "Laos"
## [123] "Latin America and the Caribbean"
## [124] "Latvia"
## [125] "Least developed countries"
## [126] "Lebanon"
## [127] "Lesotho"
## [128] "Less developed regions"
## [129] "Less developed regions, excluding China"
## [130] "Less developed regions, excluding least developed countries"
## [131] "Liberia"
## [132] "Libya"
## [133] "Liechtenstein"
## [134] "Lithuania"
## [135] "Low-income countries"
## [136] "Lower-middle-income countries"
## [137] "Luxembourg"
## [138] "Macao"
## [139] "Madagascar"
## [140] "Malawi"
## [141] "Malaysia"
## [142] "Maldives"
## [143] "Mali"
## [144] "Malta"
## [145] "Marshall Islands"
## [146] "Martinique"
## [147] "Mauritania"
## [148] "Mauritius"
## [149] "Mayotte"
## [150] "Mexico"
## [151] "Micronesia (country)"
## [152] "Middle-income countries"
## [153] "Moldova"
## [154] "Monaco"
## [155] "Mongolia"
## [156] "Montenegro"
## [157] "Montserrat"
## [158] "More developed regions"
## [159] "Morocco"
## [160] "Mozambique"
## [161] "Myanmar"
## [162] "Namibia"
## [163] "Nauru"
## [164] "Nepal"
## [165] "Netherlands"
## [166] "New Caledonia"
## [167] "New Zealand"
## [168] "Nicaragua"
## [169] "Niger"
## [170] "Nigeria"
## [171] "Niue"
## [172] "No income group available"
## [173] "North Korea"
## [174] "North Macedonia"
## [175] "Northern America"
## [176] "Northern Ireland"
## [177] "Northern Mariana Islands"
## [178] "Norway"
## [179] "Oceania"
## [180] "Oman"
## [181] "Pakistan"
## [182] "Palau"
## [183] "Palestine"
## [184] "Panama"
## [185] "Papua New Guinea"
## [186] "Paraguay"
## [187] "Peru"
## [188] "Philippines"
## [189] "Poland"
## [190] "Portugal"
## [191] "Puerto Rico"
## [192] "Qatar"
## [193] "Reunion"
## [194] "Romania"
## [195] "Russia"
## [196] "Rwanda"
## [197] "Saint Barthelemy"
## [198] "Saint Helena"
## [199] "Saint Kitts and Nevis"
## [200] "Saint Lucia"
## [201] "Saint Martin (French part)"
## [202] "Saint Pierre and Miquelon"
## [203] "Saint Vincent and the Grenadines"
## [204] "Samoa"
## [205] "San Marino"
## [206] "Sao Tome and Principe"
## [207] "Saudi Arabia"
## [208] "Scotland"
## [209] "Senegal"
## [210] "Serbia"
## [211] "Seychelles"
## [212] "Sierra Leone"
## [213] "Singapore"
## [214] "Sint Maarten (Dutch part)"
## [215] "Slovakia"
## [216] "Slovenia"
## [217] "Small Island Developing States (SIDS)"
## [218] "Solomon Islands"
## [219] "Somalia"
## [220] "South Africa"
## [221] "South Korea"
## [222] "South Sudan"
## [223] "Spain"
## [224] "Sri Lanka"
## [225] "Sudan"
## [226] "Suriname"
## [227] "Sweden"
## [228] "Switzerland"
## [229] "Syria"
## [230] "Taiwan"
## [231] "Tajikistan"
## [232] "Tanzania"
## [233] "Thailand"
## [234] "Togo"
## [235] "Tokelau"
## [236] "Tonga"
## [237] "Trinidad and Tobago"
## [238] "Tunisia"
## [239] "Turkey"
## [240] "Turkmenistan"
## [241] "Turks and Caicos Islands"
## [242] "Tuvalu"
## [243] "USSR"
## [244] "Uganda"
## [245] "Ukraine"
## [246] "United Arab Emirates"
## [247] "United Kingdom"
## [248] "United States"
## [249] "United States Virgin Islands"
## [250] "Upper-middle-income countries"
## [251] "Uruguay"
## [252] "Uzbekistan"
## [253] "Vanuatu"
## [254] "Venezuela"
## [255] "Vietnam"
## [256] "Wallis and Futuna"
## [257] "Western Sahara"
## [258] "World"
## [259] "Yemen"
## [260] "Zambia"
## [261] "Zimbabwe"
visdat::vis_miss(le_clean)
Which entities are missing codes?
Continents, Regions by Income and development status
le_clean %>%
filter(is.na(code)) %>%
select(entity) %>%
distinct()
## # A tibble: 23 × 1
## entity
## <chr>
## 1 Africa
## 2 Americas
## 3 Asia
## 4 England and Wales
## 5 Europe
## 6 High-income countries
## 7 Land-locked Developing Countries (LLDC)
## 8 Latin America and the Caribbean
## 9 Least developed countries
## 10 Less developed regions
## # ℹ 13 more rows
p <- le_clean %>%
filter(entity == "World") %>%
ggplot(aes(x = year, y = period_life_expect)) +
geom_line() +
labs(title = "World Life Expectancy by Year",
x = "Year",
y = "Life Expectancy at Birth (years)")
ggplotly(p)
development_status <- stringr::str_subset(le_clean$entity, regex("devel", ignore_case = TRUE)) %>% unique()
p <- le_clean %>%
filter(entity %in% development_status) %>%
ggplot(aes(x = year,
y = period_life_expect,
color = entity)) +
geom_line() +
labs(title = "Life Expectancy by Year",
x = "Year",
y = "Life Expectancy at Birth (years)")
ggplotly(p)
How many countries have data before 1950?
le_clean %>% drop_na(code) %>%
filter(year < 1950) %>%
pull(entity) %>%
unique() %>%
length()
## [1] 87
Only 87 of 238 countries have data before 1950.
le_clean %>% drop_na(code) %>%
filter(year >= 1950) %>%
group_by(entity) %>%
count() %>%
filter(n == 72) %>%
pull(entity) %>%
unique() %>%
length()
## [1] 237
237 of 238 countries have data every year since 1950.
le_clean %>% drop_na(code) %>%
pull(entity) %>%
unique() %>%
length()
## [1] 238
Create a new column that calculates the difference in life expectancy from the previous year.
le_clean <- le_clean %>%
group_by(entity) %>%
mutate(diff = c(NA, diff(period_life_expect))) %>%
ungroup()
p <- le_clean %>%
filter(entity %in% development_status) %>%
drop_na(diff) %>%
ggplot(aes(x = year,
y = diff,
color = entity)) +
geom_line() +
geom_abline(intercept = 0.243, slope = 0, color = "black", linetype = "dotted") +
labs(title = "Life Expectancy Diff by Year",
x = "Year",
y = "Life Expectancy at Birth (years)")
ggplotly(p)
# create a list of 3 dataframes
le_dev_ls <- list()
for (dev in development_status) {
le_dev_ls[[dev]] <- le_clean %>%
filter(entity == dev) %>%
select(period_life_expect) %>%
ts(start = 1950, end = 2021)
}
le_dev_ls
## $`Land-locked Developing Countries (LLDC)`
## Time Series:
## Start = 1950
## End = 2021
## Frequency = 1
## period_life_expect
## [1,] 39.4836
## [2,] 39.7985
## [3,] 40.2089
## [4,] 40.6129
## [5,] 40.9987
## [6,] 41.4098
## [7,] 41.8012
## [8,] 40.8860
## [9,] 41.2702
## [10,] 42.9791
## [11,] 43.3855
## [12,] 43.8382
## [13,] 44.2265
## [14,] 44.4622
## [15,] 45.0320
## [16,] 45.1144
## [17,] 44.7641
## [18,] 45.4924
## [19,] 45.7305
## [20,] 45.9128
## [21,] 46.1592
## [22,] 46.4167
## [23,] 45.8912
## [24,] 46.7321
## [25,] 46.6755
## [26,] 46.9242
## [27,] 47.7069
## [28,] 47.9663
## [29,] 48.1405
## [30,] 48.5353
## [31,] 48.9473
## [32,] 49.4773
## [33,] 49.7332
## [34,] 48.1001
## [35,] 47.9810
## [36,] 48.4865
## [37,] 49.2029
## [38,] 50.5464
## [39,] 49.9685
## [40,] 51.3800
## [41,] 51.2460
## [42,] 51.5154
## [43,] 51.1122
## [44,] 51.0153
## [45,] 48.1917
## [46,] 51.9796
## [47,] 52.1104
## [48,] 52.3150
## [49,] 51.7472
## [50,] 52.9955
## [51,] 53.5791
## [52,] 53.9245
## [53,] 54.5475
## [54,] 55.2809
## [55,] 56.0211
## [56,] 56.6869
## [57,] 57.4662
## [58,] 58.1920
## [59,] 58.9636
## [60,] 59.6964
## [61,] 60.4286
## [62,] 61.1749
## [63,] 61.8342
## [64,] 62.4491
## [65,] 62.9001
## [66,] 63.3501
## [67,] 63.9259
## [68,] 64.2752
## [69,] 64.6310
## [70,] 64.9831
## [71,] 64.0608
## [72,] 63.7099
##
## $`Least developed countries`
## Time Series:
## Start = 1950
## End = 2021
## Frequency = 1
## period_life_expect
## [1,] 36.7606
## [2,] 37.1166
## [3,] 37.6441
## [4,] 38.0314
## [5,] 38.5840
## [6,] 39.0524
## [7,] 39.4372
## [8,] 39.2478
## [9,] 39.5448
## [10,] 40.5973
## [11,] 40.9301
## [12,] 41.3303
## [13,] 41.7962
## [14,] 41.9248
## [15,] 42.6287
## [16,] 42.6081
## [17,] 42.9056
## [18,] 43.5074
## [19,] 43.8672
## [20,] 44.1094
## [21,] 42.8667
## [22,] 38.3186
## [23,] 44.5179
## [24,] 45.2031
## [25,] 45.1392
## [26,] 43.7658
## [27,] 44.3664
## [28,] 46.2304
## [29,] 46.6554
## [30,] 47.1651
## [31,] 47.6444
## [32,] 47.9803
## [33,] 48.2630
## [34,] 47.3425
## [35,] 47.3111
## [36,] 47.6962
## [37,] 48.4363
## [38,] 48.9276
## [39,] 48.4161
## [40,] 49.9467
## [41,] 50.1401
## [42,] 49.7829
## [43,] 50.2226
## [44,] 50.8393
## [45,] 49.9143
## [46,] 52.1806
## [47,] 52.3293
## [48,] 52.9811
## [49,] 52.9004
## [50,] 54.3557
## [51,] 55.0574
## [52,] 55.5683
## [53,] 56.0698
## [54,] 56.7153
## [55,] 57.2381
## [56,] 57.8142
## [57,] 58.4518
## [58,] 58.9287
## [59,] 59.1136
## [60,] 60.1000
## [61,] 60.5229
## [62,] 61.3113
## [63,] 61.9123
## [64,] 62.3808
## [65,] 62.8141
## [66,] 63.2189
## [67,] 63.7597
## [68,] 64.1711
## [69,] 64.6089
## [70,] 64.9648
## [71,] 64.4953
## [72,] 64.0715
##
## $`Less developed regions`
## Time Series:
## Start = 1950
## End = 2021
## Frequency = 1
## period_life_expect
## [1,] 41.2643
## [2,] 42.1096
## [3,] 43.2437
## [4,] 43.8704
## [5,] 44.6902
## [6,] 45.2476
## [7,] 45.7751
## [8,] 46.2070
## [9,] 46.6697
## [10,] 44.0977
## [11,] 42.1155
## [12,] 45.0834
## [13,] 48.5162
## [14,] 49.1238
## [15,] 49.7630
## [16,] 49.5482
## [17,] 50.2856
## [18,] 50.8350
## [19,] 51.6293
## [20,] 52.1374
## [21,] 52.4443
## [22,] 52.1331
## [23,] 53.6360
## [24,] 54.2855
## [25,] 54.7315
## [26,] 55.0278
## [27,] 55.5499
## [28,] 56.2761
## [29,] 56.7439
## [30,] 57.2830
## [31,] 57.7537
## [32,] 58.2335
## [33,] 58.6889
## [34,] 58.9221
## [35,] 59.3247
## [36,] 59.7419
## [37,] 60.2440
## [38,] 60.6798
## [39,] 60.8465
## [40,] 61.4357
## [41,] 61.6961
## [42,] 61.7999
## [43,] 62.1409
## [44,] 62.4766
## [45,] 62.5023
## [46,] 63.0250
## [47,] 63.2295
## [48,] 63.5679
## [49,] 63.7939
## [50,] 64.2641
## [51,] 64.6705
## [52,] 65.0843
## [53,] 65.4550
## [54,] 65.8258
## [55,] 66.1018
## [56,] 66.5652
## [57,] 67.0074
## [58,] 67.3330
## [59,] 67.5783
## [60,] 68.0619
## [61,] 68.3973
## [62,] 68.8198
## [63,] 69.2011
## [64,] 69.5546
## [65,] 69.9243
## [66,] 70.2258
## [67,] 70.5275
## [68,] 70.7720
## [69,] 71.0587
## [70,] 71.2636
## [71,] 70.6420
## [72,] 69.5611
##
## $`Less developed regions, excluding China`
## Time Series:
## Start = 1950
## End = 2021
## Frequency = 1
## period_life_expect
## [1,] 40.2970
## [2,] 41.1126
## [3,] 42.3175
## [4,] 42.9239
## [5,] 43.7917
## [6,] 44.4349
## [7,] 44.9830
## [8,] 45.3547
## [9,] 45.8433
## [10,] 46.5319
## [11,] 46.9654
## [12,] 47.4022
## [13,] 47.8435
## [14,] 48.2357
## [15,] 48.7273
## [16,] 48.0571
## [17,] 48.8081
## [18,] 49.3294
## [19,] 50.1222
## [20,] 50.5421
## [21,] 50.6060
## [22,] 49.8281
## [23,] 51.6489
## [24,] 52.2434
## [25,] 52.6560
## [26,] 52.9095
## [27,] 53.4155
## [28,] 54.2625
## [29,] 54.7306
## [30,] 55.2581
## [31,] 55.7042
## [32,] 56.1442
## [33,] 56.5427
## [34,] 56.7694
## [35,] 57.1622
## [36,] 57.5732
## [37,] 58.0727
## [38,] 58.4791
## [39,] 58.6052
## [40,] 59.2737
## [41,] 59.5398
## [42,] 59.7154
## [43,] 60.0337
## [44,] 60.3623
## [45,] 60.3318
## [46,] 60.9079
## [47,] 61.1326
## [48,] 61.4810
## [49,] 61.6547
## [50,] 62.2149
## [51,] 62.6002
## [52,] 62.9321
## [53,] 63.3150
## [54,] 63.6923
## [55,] 63.9489
## [56,] 64.4419
## [57,] 64.8884
## [58,] 65.2284
## [59,] 65.5018
## [60,] 65.9783
## [61,] 66.3382
## [62,] 66.7941
## [63,] 67.1808
## [64,] 67.5765
## [65,] 67.9774
## [66,] 68.3150
## [67,] 68.6312
## [68,] 68.9489
## [69,] 69.2067
## [70,] 69.4320
## [71,] 68.5773
## [72,] 67.1767
##
## $`Less developed regions, excluding least developed countries`
## Time Series:
## Start = 1950
## End = 2021
## Frequency = 1
## period_life_expect
## [1,] 41.9655
## [2,] 42.8733
## [3,] 44.0838
## [4,] 44.7391
## [5,] 45.5911
## [6,] 46.1583
## [7,] 46.7097
## [8,] 47.2363
## [9,] 47.7300
## [10,] 44.6174
## [11,] 42.3165
## [12,] 45.6754
## [13,] 49.5745
## [14,] 50.2328
## [15,] 50.8476
## [16,] 50.5967
## [17,] 51.4094
## [18,] 51.9605
## [19,] 52.8300
## [20,] 53.3884
## [21,] 53.9681
## [22,] 54.5085
## [23,] 55.0925
## [24,] 55.7514
## [25,] 56.3123
## [26,] 56.9397
## [27,] 57.4611
## [28,] 58.0116
## [29,] 58.5110
## [30,] 59.0747
## [31,] 59.5564
## [32,] 60.0603
## [33,] 60.5378
## [34,] 61.0239
## [35,] 61.5123
## [36,] 61.9414
## [37,] 62.3851
## [38,] 62.7887
## [39,] 63.1137
## [40,] 63.4971
## [41,] 63.7731
## [42,] 64.0083
## [43,] 64.3440
## [44,] 64.6419
## [45,] 64.8649
## [46,] 65.0858
## [47,] 65.3238
## [48,] 65.6215
## [49,] 65.9361
## [50,] 66.2149
## [51,] 66.5702
## [52,] 66.9624
## [53,] 67.3100
## [54,] 67.6354
## [55,] 67.8627
## [56,] 68.3035
## [57,] 68.7060
## [58,] 68.9963
## [59,] 69.2537
## [60,] 69.6261
## [61,] 69.9534
## [62,] 70.3006
## [63,] 70.6364
## [64,] 70.9670
## [65,] 71.3298
## [66,] 71.6204
## [67,] 71.8805
## [68,] 72.1041
## [69,] 72.3722
## [70,] 72.5593
## [71,] 71.9120
## [72,] 70.7380
##
## $`More developed regions`
## Time Series:
## Start = 1950
## End = 2021
## Frequency = 1
## period_life_expect
## [1,] 63.5226
## [2,] 63.7474
## [3,] 64.8906
## [4,] 65.4510
## [5,] 66.2719
## [6,] 66.8312
## [7,] 67.4525
## [8,] 67.4201
## [9,] 68.4923
## [10,] 68.5434
## [11,] 68.9788
## [12,] 69.3642
## [13,] 69.2322
## [14,] 69.5071
## [15,] 70.0578
## [16,] 70.0328
## [17,] 70.2362
## [18,] 70.3486
## [19,] 70.2421
## [20,] 70.1420
## [21,] 70.4366
## [22,] 70.6864
## [23,] 70.9003
## [24,] 71.0107
## [25,] 71.3220
## [26,] 71.4104
## [27,] 71.5788
## [28,] 71.8867
## [29,] 71.9699
## [30,] 72.1535
## [31,] 72.1446
## [32,] 72.4718
## [33,] 72.7917
## [34,] 72.8019
## [35,] 72.9535
## [36,] 73.0721
## [37,] 73.6632
## [38,] 73.8639
## [39,] 73.9420
## [40,] 74.1252
## [41,] 74.1723
## [42,] 74.2353
## [43,] 74.1841
## [44,] 73.7605
## [45,] 73.8473
## [46,] 73.9420
## [47,] 74.4507
## [48,] 74.8981
## [49,] 75.1678
## [50,] 75.0479
## [51,] 75.2499
## [52,] 75.4903
## [53,] 75.5222
## [54,] 75.6161
## [55,] 76.1057
## [56,] 76.1876
## [57,] 76.7299
## [58,] 77.0591
## [59,] 77.2959
## [60,] 77.7106
## [61,] 77.9313
## [62,] 78.2741
## [63,] 78.4618
## [64,] 78.7052
## [65,] 78.9622
## [66,] 78.9890
## [67,] 79.2764
## [68,] 79.4257
## [69,] 79.5600
## [70,] 79.8152
## [71,] 78.5756
## [72,] 78.0650
##
## $`Small Island Developing States (SIDS)`
## Time Series:
## Start = 1950
## End = 2021
## Frequency = 1
## period_life_expect
## [1,] 48.8265
## [2,] 48.9768
## [3,] 49.9972
## [4,] 50.7508
## [5,] 51.5593
## [6,] 52.3511
## [7,] 52.9864
## [8,] 53.5744
## [9,] 54.1417
## [10,] 54.6788
## [11,] 55.2597
## [12,] 55.7218
## [13,] 56.2235
## [14,] 56.1992
## [15,] 57.0916
## [16,] 57.3051
## [17,] 57.9652
## [18,] 58.3952
## [19,] 58.7534
## [20,] 59.1650
## [21,] 59.6021
## [22,] 59.9933
## [23,] 60.3638
## [24,] 60.7716
## [25,] 61.1315
## [26,] 60.8586
## [27,] 61.1914
## [28,] 61.3368
## [29,] 61.4078
## [30,] 61.6260
## [31,] 62.2545
## [32,] 62.5757
## [33,] 62.9408
## [34,] 63.2342
## [35,] 63.5745
## [36,] 63.8578
## [37,] 64.1750
## [38,] 64.5565
## [39,] 64.8516
## [40,] 65.3362
## [41,] 65.5723
## [42,] 65.7896
## [43,] 66.0467
## [44,] 66.2993
## [45,] 66.4531
## [46,] 66.7833
## [47,] 67.0530
## [48,] 67.2988
## [49,] 67.4484
## [50,] 67.7879
## [51,] 68.4526
## [52,] 68.6337
## [53,] 68.8566
## [54,] 69.0807
## [55,] 69.0064
## [56,] 69.5725
## [57,] 69.8160
## [58,] 70.0658
## [59,] 70.3484
## [60,] 70.6697
## [61,] 67.1929
## [62,] 71.0957
## [63,] 71.3962
## [64,] 71.5950
## [65,] 71.7563
## [66,] 71.9234
## [67,] 72.0044
## [68,] 72.1283
## [69,] 72.2182
## [70,] 72.3776
## [71,] 72.1998
## [72,] 70.8139
# install.packages("forecast")
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
le_dev_opt <- list()
for (dev in development_status) {
le_dev_opt[[dev]] <- auto.arima(le_dev_ls[[dev]])
}
p1 <- le_dev_opt$`Least developed countries` %>%
forecast(h = 5) %>%
autoplot()
p2 <- le_dev_opt$`More developed regions` %>%
forecast(h = 5) %>%
autoplot()
p3 <- le_dev_opt$`Less developed regions, excluding least developed countries` %>%
forecast(h = 5) %>%
autoplot()
p4 <- le_dev_opt$`Land-locked Developing Countries (LLDC)` %>%
forecast(h = 5) %>%
autoplot()
p5 <- le_dev_opt$`Small Island Developing States (SIDS)` %>%
forecast(h = 5) %>%
autoplot()
# display plots side by side
gridExtra::grid.arrange(p1, p2, p3, p4, p5, ncol = 2)
library(tseries)
## Warning: package 'tseries' was built under R version 4.4.1
le_dev <- le_clean %>%
filter(entity %in% development_status) %>%
select(entity, year, diff) %>% drop_na()
adf.test(le_dev %>% filter(entity == "More developed regions") %>% pull(diff))
##
## Augmented Dickey-Fuller Test
##
## data: le_dev %>% filter(entity == "More developed regions") %>% pull(diff)
## Dickey-Fuller = -2.5177, Lag order = 4, p-value = 0.3646
## alternative hypothesis: stationary
vaccination_df <- read_csv("data/global-vaccination-coverage.csv")
## Rows: 7897 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (12): Year, BCG (% of one-year-olds immunized), HepB3 (% of one-year-old...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vaccination_df %>% glimpse()
## Rows: 7,897
## Columns: 14
## $ Entity <chr> "Afghanistan", "Afghanistan", "…
## $ Code <chr> "AFG", "AFG", "AFG", "AFG", "AF…
## $ Year <dbl> 1982, 1983, 1984, 1985, 1986, 1…
## $ `BCG (% of one-year-olds immunized)` <dbl> 10, 10, 11, 17, 18, 27, 40, 38,…
## $ `HepB3 (% of one-year-olds immunized)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `Hib3 (% of one-year-olds immunized)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `IPV1 (% of one-year-olds immunized)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `MCV1 (% of one-year-olds immunized)` <dbl> 8, 9, 14, 14, 14, 31, 34, 22, 2…
## $ `PCV3 (% of one-year-olds immunized)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `Pol3 (% of one-year-olds immunized)` <dbl> 5, 5, 16, 15, 11, 25, 35, 33, 2…
## $ `RCV1 (% of one-year-olds immunized)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `RotaC (% of one-year-olds immunized)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `YFV (% of one-year-olds immunized)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `DTP3 (% of one-year-olds immunized)` <dbl> 5, 5, 16, 15, 11, 25, 35, 33, 2…
vaccination_clean <- janitor::clean_names(vaccination_df)
vaccination_long <- vaccination_clean %>%
pivot_longer(cols = -c(entity, code, year),
names_to = "vaccine",
values_to = "coverage")
visdat::vis_miss(vaccination_long)
WHO_regions <- stringr::str_subset(vaccination_df$Entity, "WHO") %>% unique()
p_vac <- vaccination_long %>%
filter(entity %in% WHO_regions) %>%
ggplot(aes(x = year, y = coverage, color = vaccine)) +
geom_line() + geom_point() +
facet_wrap(~entity) +
labs(title = "Global Vaccination Coverage by Year",
x = "Year",
y = "Coverage (%)")
ggplotly(p_vac)